*********************************************************************
**************************DATA CONSTRUCTION: EDGAR******************* 
*********************************************************************


// SO2 EMISSIONS BY COUNTRY

cd "D:\Replication\"    

* Unzip the file from https://cidportal.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/v50_AP/SO2/v50_SO2_1970_2015.zip

import excel "v50_SO2_1970_2015\v50_SO2_1970_2015.xls", sheet("v5.0_EM_SO2_IPCC2006") cellrange(A10:AZ9794) firstrow clear

keep if ISO_A3=="CHN"

foreach v of varlist G-AZ{
   replace `v'="" if `v'=="NULL"
   destring `v', replace
   local x : variable label `v'
   rename `v' emission_`x'
}

save "so2_country_sector.dta", replace

import excel "v50_SO2_1970_2015\v50_SO2_1970_2015.xls", sheet("TOTALS BY COUNTRY") cellrange(A10:AX229) firstrow clear

keep if ISO_A3=="CHN"

foreach v of varlist E-AX{
   destring `v', replace
   local x : variable label `v'
   rename `v' emission_`x'
}

save "so2_country_total.dta", replace

append using "so2_country_sector.dta"
replace IPCC = "total" in 1
replace IPCC_description = "country total" in 1

reshape long emission_, i(IPCC) j(year) string
order IPCC_description, after( IPCC )

drop IPCCAnnex WorldRegion ISO_A3 Name
rename emission_ emission
destring year, replace

save "so2_country_combine.dta", replace


// SO2 EMISSIONS GRIDMAPS

** Step 0: Data conversion: convert netCDF files to dta file using "netCDF conversion.R"

** Step 1: Combine EDGAR SO2 emissions datasets

* 1. total SO2 emission: 1970-2015

cd "D:\Replication\TOTALS_nc"    

forval i = 1970/2015 {
  	use "so2_total_`i'.dta", clear
	rename so2 so2_total_`i'
	save "so2_total_`i'.dta", replace
}

use so2_total_1970.dta, clear

forval i = 1971/2015 {
merge 1:1 lon lat using "so2_total_`i'.dta", keep(match) nogen
  }

foreach var of varlist _all {
	label var `var' ""
}

save "D:\Replication\so2_total_wide.dta", replace
  

* 2. SO2 emission from the power industry: 1970-2015 
cd "D:\Replication\ENE_powerindustry_nc"

forval i = 1970/2015 {
  	use "so2_power_`i'.dta", clear
	rename so2 so2_power_`i'
	save "so2_power_`i'.dta", replace
}

use so2_power_1970.dta, clear

forval i = 1971/2015 {
merge 1:1 lon lat using "so2_power_`i'.dta", keep(match) nogen
  }

foreach var of varlist _all {
	label var `var' ""
}

save "D:\Replication\so2_power_wide.dta", replace
  
  
* 3.  SO3 emissions from the manufacturing industry:1970-2015 
cd  "D:\Replication\IND_manufacturing_nc"

forval i = 1970/2015 {
  	use "so2_manu_`i'.dta", clear
	rename so2 so2_manu_`i'
	save "so2_manu_`i'.dta", replace
}

use so2_manu_1970.dta, clear

forval i = 1971/2015 {
merge 1:1 lon lat using "so2_manu_`i'.dta", keep(match) nogen
  }

foreach var of varlist _all {
	label var `var' ""
}
  
save "D:\Replication\so2_manu_wide.dta", replace


* 4. Merge datasets

cd "D:\Replication\"

use so2_total_wide.dta, clear
merge 1:1 lon lat using so2_manu_wide.dta, keep(match) nogen
merge 1:1 lon lat using so2_power_wide.dta, keep(match) nogen

rename lat edgar_lat
rename lon edgar_lon
g edgar_id=_n

save so2_edgar_combine_wide.dta, replace


** Step 2: Match Edgar emissions data to CHNS communities

* use geonear package -- ssc install geonear if not installed
use community_location.dta, clear
g chns_id=_n
rename Longitude chns_lon
rename Latitude chns_lat

* find nearest neighbor of each CHNS community using locations in EDGAR
geonear chns_id chns_lat chns_lon using so2_edgar_combine_wide.dta, n(edgar_id edgar_lat edgar_lon)

* merge other variables from EDGAR based on the nearest neighbor id
rename nid edgar_id
merge m:1 edgar_id using so2_edgar_combine_wide.dta, keep(match) nogen 
order *_id *_lon *_lat, first

save so2_match_wide.dta, replace


* reshape wide to long format
reshape long so2_total_ so2_manu_ so2_power_, i(commid) j(year)
rename so2_total_ so2_total
rename so2_manu_ so2_manu
rename so2_power_ so2_power

* generate annual emissions
g so2_total_ann=so2_total*3600*24*365
g so2_manu_ann=so2_manu*3600*24*365
g so2_power_ann=so2_power*3600*24*365

g WAVE=year

save so2_match_long.dta, replace
